/* This program preps firm and estab level LBDs firm and estab and merges with the BRDIS
   data to create data for regressions.  
*/

clear
clear matrix
capture log close
set more off


log using "mk_geog_lbd_brdis_mylog.log", replace


******************************************
*** 1987 SIC to 2002 NAICS Concordance ***
******************************************

import excel using ${inter}1987_SIC_to_2002_NAICS.xls, first clear
rename SIC sic
rename NAICS naics
rename SICT sicdescrip
rename NAICST naicsdescrip
gen len = length(sic)
drop if len==0
gen zero = "0"
egen temp = concat(zero sic) if len==3
replace sic = temp if len==3 & temp!=""
drop zero temp len
gen sic3 = substr(sic,1,3)
gen naics4 = substr(naics,1,4)
contract sic3 naics4
drop _freq
bysort sic3: gen j=_n
reshape wide naics4, i(sic3) j(j)
save ${inter}sic3_naics4_wide.dta, replace

use ${inter}univsys_level_indweights.dta, clear
drop pre85
drop if sic3==0
tostring sic3, replace
gen len=length(sic3)
gen zero="0"
egen temp = concat(zero sic3) if len==2
replace sic3 = temp if len==2
drop temp len zero
merge m:1 sic3 using ${inter}sic3_naics4_wide.dta
drop if _merge==2
drop _merge
rename mfg_freq mfgfreq1
rename use_freq usefreq1
foreach num of numlist 2/26 {
   gen mfgfreq`num'=mfgfreq1
   replace mfgfreq`num'=. if naics4`num'==""
   gen usefreq`num'=usefreq1
   replace usefreq`num'=. if naics4`num'==""
}

reshape long naics4 mfgfreq usefreq, i(univsysname univsysnum sic3) j(j)
drop if naics4==""

collapse (sum) mfgfreq usefreq, by(univsysname univsysnum naics4)
save ${inter}univsys_naics4_indweights.dta, replace

bysort univsysnum: gen j=_n
reshape wide naics4 mfgfreq usefreq, i(univsysname univsysnum) j(j)

merge 1:m univsysnum using ${inter}univ_counties.dta
keep if _merge==3
drop _merge

rename surstcntyfips fips

duplicates tag univsysname univsysnum cont fips, gen(dup)
tab dup, miss
bysort dup univsysname univsysnum cont fips: drop if _n>1
drop dup

reshape long naics4 mfgfreq usefreq, i(univsysname univsysnum cont fips) j(indnum)

collapse (sum) mfgfreq usefreq, by(fips naics4)

tostring fips, replace
gen len = length(fips)
gen zero = "0"
egen temp = concat(zero fips) if len==4
replace fips = temp if len==4
drop len zero temp

save ${inter}fips_naics4_innovindex_75mi.dta, replace

************************
*** END SIC to NAICS ***
************************


******************
*** Prep BRDIS ***
******************

foreach year in 2008 2009 2010 2011 2012 {

use ${p1069}data/brdis/brdis`year'.dta, clear

gen yr = `year'

if yr==2008 | yr==2009 | yr==2012 {

foreach var in nord rd totrd wosal sales wotrd rdown outto usper india irlnd isral wages benft stock texps capex wbusp wscip wbthp wnewp colb fpcw flcw uncw npcw agree usfpc fofpc usfga usuni forga custo vendo compe profe intrn pphd visit gifts i5less comer ventu wftrd zyrd zxrd wemp wrdem emp rdemp inova inovb inovc inovd inove apply apop issue invp revep licen ipa ipb ipc ipd ipe ipf ipg iph ipi utili desig tmark copy trade mask {
   rename `var'_e `var'
   drop `var'_r `var'_a `var'_w `var'_f `var'_g `var'_i
}

}



if yr==2010 | yr==2011 { 

foreach x in r e a w f {
   rename i5les_`x' i5less_`x'
}

foreach var in nord rd totrd wosal sales wotrd rdown outto usper india irlnd isral wages benft stock texps capex wbusp wscip wbthp wnewp colb fpcw flcw uncw npcw agree usfpc fofpc usfga usuni forga custo vendo compe profe intrn pphd visit gifts i5less comer ventu wftrd zyrd zxrd wemp wrdem emp rdemp inova inovb inovc inovd inove apply apop issue invp revep licen ipa ipb ipc ipd ipe ipf ipg iph ipi utili desig tmark copy trade mask {
   rename `var'_e `var'
   drop `var'_r `var'_a `var'_w `var'_f 
}

}


keep yr lbdnum_c201200 alpha id einum brid status form naics stratm smpwgt wgt _mos  rspcde zip zip4 state nord rd totrd wosal sales wotrd rdown outto usper india irlnd isral wages benft stock texps capex wbusp wscip wbthp wnewp colb fpcw flcw uncw npcw agree usfpc fofpc usfga usuni forga custo vendo compe profe intrn pphd visit gifts i5less comer ventu wftrd zyrd zxrd wemp wrdem emp rdemp inova inovb inovc inovd inove apply apop issue invp revep licen ipa ipb ipc ipd ipe ipf ipg iph ipi utili desig tmark copy trade mask

rename lbdnum_c201200 lbdnum
rename emp domemp
rename state abrstate

gen insample = 1 if nord!=. | totrd!=.
drop if insample!=1


** CALC RD MEASURES **

gen posrd = 0 if totrd!=.
replace posrd = 1 if totrd>0 & totrd!=.
replace posrd = 1 if wotrd>0 & wotrd!=.
gen totrdpersales = totrd/sales
gen totrdperwosales = totrd/wosal

gen fracrdown = rdown/totrd
gen fracrdothers = outto/totrd

egen rdcomp = rsum(wages benft stock)
gen fracrdcomp = rdcomp/totrd
label var fracrdcomp "comp to RD emp as a frac of total RD exp"

gen wwfracrdemp = wrdem/wemp
gen domfracrdemp1 = rdemp/domemp

gen nform = 1 if form=="BRDI-1"
replace nform = 2 if form=="BRDI-1A" | form=="BRD-1S" | form=="BRDI-1B"

rename isral israel
rename irlnd ireland

** SPILLOVER MEASURES **

label var colb "pct of ww rd exp for agreements; 2008-2009"

gen Iagree = 0 if agree!=.
replace Iagree = 1 if agree>0 & agree!=.
foreach var in usfpc fofpc usfga usuni forga custo vendo compe {
   replace `var' = 0 if `var'==2
}

label var wftrd "total costs for RD funded by others"
label var zyrd "costs for fbo RD done by this company"
label var zxrd "costs for fbo RD done by subcontractors"
foreach var in wftrd zyrd zxrd {
   gen I`var' = 0 if `var'!=.
   replace I`var' = 1 if `var'>0 & `var'!=.
}

label var revep "amnt of your co's revenue from patents licensed to others"
foreach var in revep licen {
   gen I`var'=0 if `var'!=.
   replace I`var' = 1 if `var'>0 & `var'!=.
}


foreach l in a b c d e f g h i {
   replace ip`l' = 0 if ip`l'==2
}


** INNOVATION MEASURES **

foreach l in a b c d e {
   replace inov`l'= 0 if inov`l'==2
}

foreach var in apply issue {
   gen I`var' = 0 if `var'!=. 
   replace I`var' = 1 if `var'>0 & `var'!=.
}

foreach var in utili desig tmark copy trade mask {
   replace `var' = 1 if `var'== 2
   replace `var' = 0 if `var'== 3
}


** UNIVERSITY RELATIONSHIP MEASURES **

foreach var in profe intrn pphd visit gifts comer ventu { 
   replace `var'=0 if `var'==2
}
gen Iuncw = 0 if uncw!=.
replace Iuncw = 1 if uncw>0 & uncw!=.

** END CALC RD MEASURES **




*** PREPARE TO MERGE WITH LBD ***
* first match to lbd using alpha+four zeros on end (brdis) and firmid (lbd)
* then for what's left over, use id (brdis) and cfn (lbd).  should match 95%
gen zeros = "0000"
egen firmid = concat(alpha zeros) if alpha!=""
drop zeros
rename id cfn


preserve 
   keep if firmid==""
   drop if cfn==""
   duplicates tag cfn, gen(dup)
   drop if dup>0
   drop dup
   sort cfn
   rename yr year
   save ${inter}brdis_blankfirmids`year'.dta, replace
restore

drop if firmid==""
duplicates tag firmid, gen(dup)
tab dup, miss
drop if dup>0 & status=="I"
drop dup
duplicates tag firmid, gen(dup)
drop if dup>0 
drop dup

rename yr year

save ${inter}brdis_firmid`year'.dta, replace

}


use ${inter}brdis_blankfirmids2008.dta, clear
foreach year in 2009 2010 2011 2012 {
   append using ${inter}brdis_blankfirmids`year'.dta
} 
save ${inter}brdis_blankfirmids_allyrs.dta, replace

use ${inter}brdis_firmid2008.dta, clear
foreach year in 2009 2010 2011 2012 {
   append using ${inter}brdis_firmid`year'.dta
} 
save ${inter}brdis_firmid_allyrs.dta, replace


**********************
*** END BRDIS Prep ***
**********************


*****************
*** LBD FILES ***
*****************


foreach year in 2008 2009 2010 2011 2012 {
   use ${lbd}firm_long_new_emp.dta if year==`year', clear
   count
   count if firmage==.
   count if firmage==. & emp!=0 & emp!=.
   drop if firmage==.
   keep firmid year estabs firmage emp pay f_firstyear
   rename emp firmlbdemp
   rename pay firmlbdpay
   sort firmid
   drop if firmid==""
   duplicates tag firmid, gen(dup)
   drop if dup>0
   drop dup
   compress
   save ${inter}rdfirmlbd`year'.dta, replace
}


*** END FIRM LBD ***

foreach year in 2008 2009 2010 2011 2012 {

use ${lbd}lbd`year'.dta, clear

rename yr year

drop if firmid==""


keep if (act=="Y")
drop if (flaga=="D" | flaga=="6")
keep if pay>0 & pay!=.
drop if emp==. | emp==0
gen len = length(county)
drop if len<3
drop len
gen len = length(state)
drop if len<2
drop len

egen fips = concat(state county) 
drop if fips==""

destring state, gen(numstate) 
drop if numstate>56
drop if state==""
drop if county==""

sort flagb
egen estbcd = group(flagb)
label define estlife 1 "birth" 2 "continuer" 3 "death"
label values estbcd estlife

drop cbp*
replace naics = "" if naics=="00000000"
replace naics = bestnaics if naics=="" & bestnaics!=""
drop if naics=="" & bestnaics=="" & sic=="" & bestsic==""

replace sic = bestsic if (sic=="" | sic=="000000")

gen sic3 = substr(sic,1,3)
count if sic==""
disp r(N)/_N

duplicates tag cfn, gen(dup)
tab dup, miss
drop if dup==1 & emp==.
drop if dup==1 & emp==0
drop dup
duplicates tag cfn, gen(dup)
drop if dup>0
drop dup


count if naics==""
disp r(N)/_N
gen naics4 = substr(naics,1,4)
drop if naics4==""

do ${programs}fix_ind_codes_v1.do
drop if inlist(naics4,"1100","2111","5152","5251","5511","3611","5612","7121")


duplicates tag lbdnum, gen(dup)
gsort lbdnum dup -emp -pay
by lbdnum: drop if _n>1
drop dup

merge m:1 firmid year using ${inter}rdfirmlbd`year'.dta
keep if _merge==3
drop _merge

gen     es = 1 if emp>=1 & emp<=10
replace es = 2 if emp>10 & emp<=100
replace es = 3 if emp>100 & emp<=1000
replace es = 4 if emp>1000 & emp!=.


gen     fs = 1 if firmlbdemp>=1 & firmlbdemp<=10
replace fs = 2 if firmlbdemp>10 & firmlbdemp<=100
replace fs = 3 if firmlbdemp>100 & firmlbdemp<=1000
replace fs = 4 if firmlbdemp>1000 & firmlbdemp!=.

gen payperworker = pay/emp
count if pay==0
count

gen age = year - firstyear
disp "summary of estab age"
sum age, det
gen agegroup = 1 if age==0
replace agegroup = 2 if age>0 & age<=5
replace agegroup = 3 if age>5 & age<=10
replace agegroup = 4 if age>10 & age!=.

tab es if agegroup==1, miss
tab es if agegroup==2, miss
tab es if agegroup==3, miss
tab es if agegroup==4, miss

gen fag = 1 if firmage==0
replace fag = 2 if firmage>0 & firmage<=5
replace fag = 3 if firmage>5 & firmage<=10
replace fag = 4 if firmage>10 & firmage!=.

merge m:1 fips using ${inter}cntymsa_xwalk.dta
tab _merge
keep if _m==3
drop _merge


sort lbdnum
save ${inter}lbd`year'_lbdnum.dta, replace

}

use ${inter}lbd2008_lbdnum.dta, clear
append using ${inter}lbd2009_lbdnum.dta
append using ${inter}lbd2010_lbdnum.dta
append using ${inter}lbd2011_lbdnum.dta
append using ${inter}lbd2012_lbdnum.dta

keep year emp pay cfn mu sic3 naics4 state county zip fips firmid lbdnum firstyear lastyear estabs firmlbdemp firmlbdpay f_firstyear firmage es fs payperworker age agegroup fag msa nonmsa 
compress
save ${inter}temp_lbdall_lbdnum.dta, replace


*** MERGE BRDIS in ***

merge m:1 firmid year using ${inter}brdis_firmid_allyrs.dta
save ${inter}tmpbrdismerge.dta, replace
   keep if _merge==2
   drop nonmsa msa fag age agegroup payperworker fs es firmage f_firstyear firmlbdpay firmlbdemp emp pay estabs lastyear firstyear lbdnum zip county state fips mu sic3 naics4 
   drop _merge
   duplicates tag cfn, gen(dup)
   drop if dup>0
   drop dup
   save ${inter}brdis_failedfirmid.dta, replace
use ${inter}tmpbrdismerge.dta, clear
drop if _merge==2
rename _merge merge_firmid

merge 1:1 cfn year using ${inter}brdis_failedfirmid.dta
drop if _merge==2
drop _merge merge_firmid

merge 1:1 cfn year using ${inter}brdis_blankfirmids_allyrs.dta
preserve
   keep if _merge==2
   drop _merge
   duplicates tag cfn, gen(dup)
   tab dup, miss
   drop if dup>0
   save ${inter}brdis_blankfirmids_nolbdmerge.dta, replace
restore
drop if _merge==2
drop _merge

merge m:1 cfn using ${inter}brdis_blankfirmids_nolbdmerge.dta, update
drop if _merge==2
drop _merge

save ${inter}brdis_lbd_merged.dta, replace


 rm ${inter}tmpbrdismerge.dta

*** END BRDIS MERGE ***


use ${inter}brdis_lbd_merged.dta, clear


   keep if msa<=32
save ${inter}brdislbdpart1.dta, replace
use ${inter}brdis_lbd_merged.dta, clear 
   keep if msa>32 & msa<=520
save ${inter}brdislbdpart2.dta, replace
use ${inter}brdis_lbd_merged.dta, clear 
   keep if msa>520 & msa<=1600
save ${inter}brdislbdpart3.dta, replace
use ${inter}brdis_lbd_merged.dta, clear 
   keep if msa>1600 & msa<=2560
save ${inter}brdislbdpart4.dta, replace
use ${inter}brdis_lbd_merged.dta, clear 
   keep if msa>2560 & msa<=3740
save ${inter}brdislbdpart5.dta, replace
use ${inter}brdis_lbd_merged.dta, clear 
   keep if msa>3740 & msa<=5015
save ${inter}brdislbdpart6.dta, replace
use ${inter}brdis_lbd_merged.dta, clear 
   keep if msa>5015 & msa<=5640
save ${inter}brdislbdpart7.dta, replace
use ${inter}brdis_lbd_merged.dta, clear 
   keep if msa>5640 & msa<=6600
save ${inter}brdislbdpart8.dta, replace
use ${inter}brdis_lbd_merged.dta, clear 
   keep if msa>6600 & msa<=7560
save ${inter}brdislbdpart9.dta, replace
use ${inter}brdis_lbd_merged.dta, clear 
   keep if msa>7560 & msa<=9360
save ${inter}brdislbdpart10.dta, replace

foreach p of numlist 1/10 {

use ${inter}brdislbdpart`p'.dta, clear

 merge m:1 lbdnum using ${inter}sae_lbdnum.dta
 drop if _merge==2
 drop _merge

gen     fssae = 1 if sae>=1 & sae<=10
replace fssae = 2 if sae>10 & sae<=100
replace fssae = 3 if sae>100 & sae<=1000
replace fssae = 4 if sae>1000 & sae!=.

gen domfracrdemp2 = rdemp/firmlbdemp


egen uniindex = rowtotal(profe intrn pphd visit gifts comer usuni Iuncw)
replace uniindex = 1 if uniindex>0 & uniindex!=.

gen intpphd = 1 if intrn==1 | pphd==1
gen inovbe = 1 if inovb==1 | inove==1
gen inovcd = 1 if inovc==1 | inovd==1
gen ipab = 1 if ipa==1 | ipb==1
gen ipcd = 1 if ipc==1 | ipd==1
gen ipef = 1 if ipe==1 | ipf==1
gen iphi = 1 if iph==1 | ipi==1 

foreach var in profe intpphd visit gifts comer ventu ipab ipcd ipef ipg iphi usuni Iuncw Irevep Ilicen posrd Iagree usfpc usfga  Iwftrd inova utili inovbe inovcd uniindex  {
   foreach num in 1 2 3 4 {
      gen `var'fs`num' = 1 if `var'==1 & fs==`num' & firstyear>1980 & firstyear!=.  
      gen pre80`var'fs`num' = 1 if `var'==1 & fs==`num' & firstyear<=1980 & firstyear!=.
   }
      gen `var'fsall = 1 if `var'==1 & firstyear>1980 & firstyear!=.  
      gen pre80`var'fsall = 1 if `var'==1 & firstyear<=1980 & firstyear!=.
}

foreach num in 1 2 3 4 {
   gen uniindexfssae`num' = 1 if uniindex==1 & fssae==`num' & firstyear>1980 & firstyear!=.
   gen pre80uniindexfssae`num' = 1 if uniindex==1 & fssae==`num' & firstyear<=1980 & firstyear!=.
}

egen maxyear = max(year), by(lbdnum fips naics4)
foreach var in  profe intpphd visit gifts comer ventu ipab ipcd ipef ipg iphi usuni Iuncw Irevep Ilicen posrd Iagree usfpc usfga  Iwftrd inova utili inovbe inovcd uniindex {
   egen `var'cntfsall = max(`var'fsall), by(lbdnum fips naics4)
   replace `var'cntfsall = . if year!=maxyear & `var'cntfsall!=.
   gen pw`var'cntfsall =smpwgt*`var'cntfsall
   drop `var'cntfsall
   egen pre80`var'cntfsall = max(pre80`var'fsall), by(lbdnum fips naics4)
   replace pre80`var'cntfsall = . if year!=maxyear & pre80`var'cntfsall!=.
   gen pwpre80`var'cntfsall =smpwgt*pre80`var'cntfsall
   drop pre80`var'cntfsall
}

foreach num in 1 2 3 4 {
   foreach var in  profe intpphd visit gifts comer ventu ipab ipcd ipef ipg iphi usuni Iuncw Irevep Ilicen posrd Iagree usfpc usfga  Iwftrd inova utili inovbe inovcd uniindex {
      egen `var'cntfs`num' = max(`var'fs`num'), by(lbdnum fips naics4)
      replace `var'cntfs`num' = . if year!=maxyear & `var'cntfs`num'!=.
      gen pw`var'cntfs`num' = smpwgt*`var'cntfs`num'
      drop `var'cntfs`num'
      egen pre80`var'cntfs`num' = max(pre80`var'fs`num'), by(lbdnum fips naics4)
      replace pre80`var'cntfs`num' = . if year!=maxyear & pre80`var'cntfs`num'!=.
      gen pwpre80`var'cntfs`num' = smpwgt*pre80`var'cntfs`num'
      drop pre80`var'cntfs`num'
   }
}

foreach num in 1 2 3 4 {
      egen uniindexcntfssae`num' = max(uniindexfssae`num'), by(lbdnum fips naics4)
      replace uniindexcntfssae`num' = . if year!=maxyear & uniindexcntfssae`num'!=.
      gen pwuniindexcntfssae`num' = smpwgt*uniindexcntfssae`num' 
      drop uniindexcntfssae`num'
      egen pre80uniindexcntfssae`num' = max(pre80uniindexfssae`num'), by(lbdnum fips naics4)
      replace pre80uniindexcntfssae`num' = . if year!=maxyear & pre80uniindexcntfssae`num'!=.
      gen pwpre80uniindexcntfssae`num' = smpwgt*pre80uniindexcntfssae`num' 
      drop pre80uniindexcntfssae`num'

}

foreach var in totrdpersales fracrdothers domfracrdemp2 {
   foreach num in 1 2 3 4 {
      gen `var'fs`num' = `var' if fs==`num' 
   }
}


gen pwpre80apply = smpwgt*apply if firstyear<=1980
gen pwpost80apply = smpwgt*apply if firstyear>1980
gen pwpre80issue = smpwgt*issue if firstyear<=1980
gen pwpost80issue = smpwgt*issue if firstyear>1980

collapse (mean) totrdpersales fracrdothers domfracrdemp2 totrdpersalesfs1-domfracrdemp2fs4 (sum) pwprofecntfsall-pwpre80uniindexcntfsall pwprofecntfs1-pwpre80uniindexcntfs4 pwuniindexcntfssae1-pwpre80uniindexcntfssae4 pwpre80apply-pwpost80issue, by(msa nonmsa fips naics4)

save ${inter}brdislbdcollapsedp`p'.dta, replace

}


foreach p of numlist 1/9 {
   append using ${inter}brdislbdcollapsedp`p'.dta
}

foreach p of numlist 1/10 {
   rm ${inter}brdislbdcollapsedp`p'.dta
}

rename msa nummsa
tostring nummsa, gen(shortmsa)
gen len=length(shortmsa)
gen zero = "0"
egen temp1 = concat(zero shortmsa) if len==3
egen temp2 = concat(zero zero shortmsa) if len==2
egen temp3 = concat(zero zero zero shortmsa) if len==1
gen msa = shortmsa if len==4
replace msa=temp1 if len==3
replace msa=temp2 if len==2
replace msa=temp3 if len==1
count if msa==""
drop temp1 temp2 temp3 zero len shortmsa


capture rm ${inter}tempmerged.dta

********************
*** END LBD PREP ***
********************

merge 1:1 fips naics4 using ${inter}fips_naics4_innovindex_75mi.dta

drop if _merge==2
replace mfgfreq = 0 if _m==1
replace usefreq = 0 if _m==1
gen univcntyind = 0 if _m==1
replace univcntyind = 1 if _m==3
rename _m mergeindex
egen univcnty = max(univcntyind), by(fips)
tab univcnty, miss
preserve
   contract fips univcnty
   count
   drop _freq
   contract fips
   count
restore
drop univcntyind

save ${output_data}lbd_brdis_innovindex.dta, replace


log close
